default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64


EXTERN  OPENSSL_ia32cap_P
global  aesni_cbc_sha256_enc

ALIGN   16
aesni_cbc_sha256_enc:

        lea     r11,[OPENSSL_ia32cap_P]
        mov     eax,1
        cmp     rcx,0
        je      NEAR $L$probe
        mov     eax,DWORD[r11]
        mov     r10,QWORD[4+r11]
        bt      r10,61
        jc      NEAR aesni_cbc_sha256_enc_shaext
        mov     r11,r10
        shr     r11,32

        test    r10d,2048
        jnz     NEAR aesni_cbc_sha256_enc_xop
        and     r11d,296
        cmp     r11d,296
        je      NEAR aesni_cbc_sha256_enc_avx2
        and     r10d,268435456
        jnz     NEAR aesni_cbc_sha256_enc_avx
        ud2
        xor     eax,eax
        cmp     rcx,0
        je      NEAR $L$probe
        ud2
$L$probe:
        DB      0F3h,0C3h               ;repret



section .rdata rdata align=64
ALIGN   64

K256:
        DD      0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
        DD      0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
        DD      0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
        DD      0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
        DD      0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
        DD      0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
        DD      0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
        DD      0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
        DD      0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
        DD      0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
        DD      0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
        DD      0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
        DD      0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
        DD      0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
        DD      0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
        DD      0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
        DD      0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
        DD      0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
        DD      0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
        DD      0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
        DD      0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
        DD      0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
        DD      0xd192e819,0xd6990624,0xf40e3585,0x106aa070
        DD      0xd192e819,0xd6990624,0xf40e3585,0x106aa070
        DD      0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
        DD      0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
        DD      0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
        DD      0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
        DD      0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
        DD      0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
        DD      0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
        DD      0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2

        DD      0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
        DD      0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
        DD      0,0,0,0,0,0,0,0,-1,-1,-1,-1
        DD      0,0,0,0,0,0,0,0
DB      65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
DB      32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
DB      54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
DB      121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
DB      46,111,114,103,62,0
ALIGN   64
section .text

ALIGN   64
aesni_cbc_sha256_enc_xop:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_xop:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8
        mov     rcx,r9
        mov     r8,QWORD[40+rsp]
        mov     r9,QWORD[48+rsp]



$L$xop_shortcut:
        mov     r10,QWORD[56+rsp]
        mov     rax,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        push    r15

        sub     rsp,288
        and     rsp,-64

        shl     rdx,6
        sub     rsi,rdi
        sub     r10,rdi
        add     rdx,rdi


        mov     QWORD[((64+8))+rsp],rsi
        mov     QWORD[((64+16))+rsp],rdx

        mov     QWORD[((64+32))+rsp],r8
        mov     QWORD[((64+40))+rsp],r9
        mov     QWORD[((64+48))+rsp],r10
        mov     QWORD[120+rsp],rax

        movaps  XMMWORD[128+rsp],xmm6
        movaps  XMMWORD[144+rsp],xmm7
        movaps  XMMWORD[160+rsp],xmm8
        movaps  XMMWORD[176+rsp],xmm9
        movaps  XMMWORD[192+rsp],xmm10
        movaps  XMMWORD[208+rsp],xmm11
        movaps  XMMWORD[224+rsp],xmm12
        movaps  XMMWORD[240+rsp],xmm13
        movaps  XMMWORD[256+rsp],xmm14
        movaps  XMMWORD[272+rsp],xmm15
$L$prologue_xop:
        vzeroall

        mov     r12,rdi
        lea     rdi,[128+rcx]
        lea     r13,[((K256+544))]
        mov     r14d,DWORD[((240-128))+rdi]
        mov     r15,r9
        mov     rsi,r10
        vmovdqu xmm8,XMMWORD[r8]
        sub     r14,9

        mov     eax,DWORD[r15]
        mov     ebx,DWORD[4+r15]
        mov     ecx,DWORD[8+r15]
        mov     edx,DWORD[12+r15]
        mov     r8d,DWORD[16+r15]
        mov     r9d,DWORD[20+r15]
        mov     r10d,DWORD[24+r15]
        mov     r11d,DWORD[28+r15]

        vmovdqa xmm14,XMMWORD[r14*8+r13]
        vmovdqa xmm13,XMMWORD[16+r14*8+r13]
        vmovdqa xmm12,XMMWORD[32+r14*8+r13]
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        jmp     NEAR $L$loop_xop
ALIGN   16
$L$loop_xop:
        vmovdqa xmm7,XMMWORD[((K256+512))]
        vmovdqu xmm0,XMMWORD[r12*1+rsi]
        vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
        vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
        vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
        vpshufb xmm0,xmm0,xmm7
        lea     rbp,[K256]
        vpshufb xmm1,xmm1,xmm7
        vpshufb xmm2,xmm2,xmm7
        vpaddd  xmm4,xmm0,XMMWORD[rbp]
        vpshufb xmm3,xmm3,xmm7
        vpaddd  xmm5,xmm1,XMMWORD[32+rbp]
        vpaddd  xmm6,xmm2,XMMWORD[64+rbp]
        vpaddd  xmm7,xmm3,XMMWORD[96+rbp]
        vmovdqa XMMWORD[rsp],xmm4
        mov     r14d,eax
        vmovdqa XMMWORD[16+rsp],xmm5
        mov     esi,ebx
        vmovdqa XMMWORD[32+rsp],xmm6
        xor     esi,ecx
        vmovdqa XMMWORD[48+rsp],xmm7
        mov     r13d,r8d
        jmp     NEAR $L$xop_00_47

ALIGN   16
$L$xop_00_47:
        sub     rbp,-16*2*4
        vmovdqu xmm9,XMMWORD[r12]
        mov     QWORD[((64+0))+rsp],r12
        vpalignr        xmm4,xmm1,xmm0,4
        ror     r13d,14
        mov     eax,r14d
        vpalignr        xmm7,xmm3,xmm2,4
        mov     r12d,r9d
        xor     r13d,r8d
DB      143,232,120,194,236,14
        ror     r14d,9
        xor     r12d,r10d
        vpsrld  xmm4,xmm4,3
        ror     r13d,5
        xor     r14d,eax
        vpaddd  xmm0,xmm0,xmm7
        and     r12d,r8d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[rsp]
        mov     r15d,eax
DB      143,232,120,194,245,11
        ror     r14d,11
        xor     r12d,r10d
        vpxor   xmm4,xmm4,xmm5
        xor     r15d,ebx
        ror     r13d,6
        add     r11d,r12d
        and     esi,r15d
DB      143,232,120,194,251,13
        xor     r14d,eax
        add     r11d,r13d
        vpxor   xmm4,xmm4,xmm6
        xor     esi,ebx
        add     edx,r11d
        vpsrld  xmm6,xmm3,10
        ror     r14d,2
        add     r11d,esi
        vpaddd  xmm0,xmm0,xmm4
        mov     r13d,edx
        add     r14d,r11d
DB      143,232,120,194,239,2
        ror     r13d,14
        mov     r11d,r14d
        vpxor   xmm7,xmm7,xmm6
        mov     r12d,r8d
        xor     r13d,edx
        ror     r14d,9
        xor     r12d,r9d
        vpxor   xmm7,xmm7,xmm5
        ror     r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vpxor   xmm9,xmm9,xmm8
        xor     r13d,edx
        vpsrldq xmm7,xmm7,8
        add     r10d,DWORD[4+rsp]
        mov     esi,r11d
        ror     r14d,11
        xor     r12d,r9d
        vpaddd  xmm0,xmm0,xmm7
        xor     esi,eax
        ror     r13d,6
        add     r10d,r12d
        and     r15d,esi
DB      143,232,120,194,248,13
        xor     r14d,r11d
        add     r10d,r13d
        vpsrld  xmm6,xmm0,10
        xor     r15d,eax
        add     ecx,r10d
DB      143,232,120,194,239,2
        ror     r14d,2
        add     r10d,r15d
        vpxor   xmm7,xmm7,xmm6
        mov     r13d,ecx
        add     r14d,r10d
        ror     r13d,14
        mov     r10d,r14d
        vpxor   xmm7,xmm7,xmm5
        mov     r12d,edx
        xor     r13d,ecx
        ror     r14d,9
        xor     r12d,r8d
        vpslldq xmm7,xmm7,8
        ror     r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r13d,ecx
        vpaddd  xmm0,xmm0,xmm7
        add     r9d,DWORD[8+rsp]
        mov     r15d,r10d
        ror     r14d,11
        xor     r12d,r8d
        vpaddd  xmm6,xmm0,XMMWORD[rbp]
        xor     r15d,r11d
        ror     r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        ror     r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        ror     r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        ror     r14d,9
        xor     r12d,edx
        ror     r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[12+rsp]
        mov     esi,r9d
        ror     r14d,11
        xor     r12d,edx
        xor     esi,r10d
        ror     r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        ror     r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        vmovdqa XMMWORD[rsp],xmm6
        vpalignr        xmm4,xmm2,xmm1,4
        ror     r13d,14
        mov     r8d,r14d
        vpalignr        xmm7,xmm0,xmm3,4
        mov     r12d,ebx
        xor     r13d,eax
DB      143,232,120,194,236,14
        ror     r14d,9
        xor     r12d,ecx
        vpsrld  xmm4,xmm4,3
        ror     r13d,5
        xor     r14d,r8d
        vpaddd  xmm1,xmm1,xmm7
        and     r12d,eax
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[16+rsp]
        mov     r15d,r8d
DB      143,232,120,194,245,11
        ror     r14d,11
        xor     r12d,ecx
        vpxor   xmm4,xmm4,xmm5
        xor     r15d,r9d
        ror     r13d,6
        add     edx,r12d
        and     esi,r15d
DB      143,232,120,194,248,13
        xor     r14d,r8d
        add     edx,r13d
        vpxor   xmm4,xmm4,xmm6
        xor     esi,r9d
        add     r11d,edx
        vpsrld  xmm6,xmm0,10
        ror     r14d,2
        add     edx,esi
        vpaddd  xmm1,xmm1,xmm4
        mov     r13d,r11d
        add     r14d,edx
DB      143,232,120,194,239,2
        ror     r13d,14
        mov     edx,r14d
        vpxor   xmm7,xmm7,xmm6
        mov     r12d,eax
        xor     r13d,r11d
        ror     r14d,9
        xor     r12d,ebx
        vpxor   xmm7,xmm7,xmm5
        ror     r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r13d,r11d
        vpsrldq xmm7,xmm7,8
        add     ecx,DWORD[20+rsp]
        mov     esi,edx
        ror     r14d,11
        xor     r12d,ebx
        vpaddd  xmm1,xmm1,xmm7
        xor     esi,r8d
        ror     r13d,6
        add     ecx,r12d
        and     r15d,esi
DB      143,232,120,194,249,13
        xor     r14d,edx
        add     ecx,r13d
        vpsrld  xmm6,xmm1,10
        xor     r15d,r8d
        add     r10d,ecx
DB      143,232,120,194,239,2
        ror     r14d,2
        add     ecx,r15d
        vpxor   xmm7,xmm7,xmm6
        mov     r13d,r10d
        add     r14d,ecx
        ror     r13d,14
        mov     ecx,r14d
        vpxor   xmm7,xmm7,xmm5
        mov     r12d,r11d
        xor     r13d,r10d
        ror     r14d,9
        xor     r12d,eax
        vpslldq xmm7,xmm7,8
        ror     r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r13d,r10d
        vpaddd  xmm1,xmm1,xmm7
        add     ebx,DWORD[24+rsp]
        mov     r15d,ecx
        ror     r14d,11
        xor     r12d,eax
        vpaddd  xmm6,xmm1,XMMWORD[32+rbp]
        xor     r15d,edx
        ror     r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        ror     r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        ror     r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        ror     r14d,9
        xor     r12d,r11d
        ror     r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[28+rsp]
        mov     esi,ebx
        ror     r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        ror     r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        ror     r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        vmovdqa XMMWORD[16+rsp],xmm6
        vpalignr        xmm4,xmm3,xmm2,4
        ror     r13d,14
        mov     eax,r14d
        vpalignr        xmm7,xmm1,xmm0,4
        mov     r12d,r9d
        xor     r13d,r8d
DB      143,232,120,194,236,14
        ror     r14d,9
        xor     r12d,r10d
        vpsrld  xmm4,xmm4,3
        ror     r13d,5
        xor     r14d,eax
        vpaddd  xmm2,xmm2,xmm7
        and     r12d,r8d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[32+rsp]
        mov     r15d,eax
DB      143,232,120,194,245,11
        ror     r14d,11
        xor     r12d,r10d
        vpxor   xmm4,xmm4,xmm5
        xor     r15d,ebx
        ror     r13d,6
        add     r11d,r12d
        and     esi,r15d
DB      143,232,120,194,249,13
        xor     r14d,eax
        add     r11d,r13d
        vpxor   xmm4,xmm4,xmm6
        xor     esi,ebx
        add     edx,r11d
        vpsrld  xmm6,xmm1,10
        ror     r14d,2
        add     r11d,esi
        vpaddd  xmm2,xmm2,xmm4
        mov     r13d,edx
        add     r14d,r11d
DB      143,232,120,194,239,2
        ror     r13d,14
        mov     r11d,r14d
        vpxor   xmm7,xmm7,xmm6
        mov     r12d,r8d
        xor     r13d,edx
        ror     r14d,9
        xor     r12d,r9d
        vpxor   xmm7,xmm7,xmm5
        ror     r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r13d,edx
        vpsrldq xmm7,xmm7,8
        add     r10d,DWORD[36+rsp]
        mov     esi,r11d
        ror     r14d,11
        xor     r12d,r9d
        vpaddd  xmm2,xmm2,xmm7
        xor     esi,eax
        ror     r13d,6
        add     r10d,r12d
        and     r15d,esi
DB      143,232,120,194,250,13
        xor     r14d,r11d
        add     r10d,r13d
        vpsrld  xmm6,xmm2,10
        xor     r15d,eax
        add     ecx,r10d
DB      143,232,120,194,239,2
        ror     r14d,2
        add     r10d,r15d
        vpxor   xmm7,xmm7,xmm6
        mov     r13d,ecx
        add     r14d,r10d
        ror     r13d,14
        mov     r10d,r14d
        vpxor   xmm7,xmm7,xmm5
        mov     r12d,edx
        xor     r13d,ecx
        ror     r14d,9
        xor     r12d,r8d
        vpslldq xmm7,xmm7,8
        ror     r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r13d,ecx
        vpaddd  xmm2,xmm2,xmm7
        add     r9d,DWORD[40+rsp]
        mov     r15d,r10d
        ror     r14d,11
        xor     r12d,r8d
        vpaddd  xmm6,xmm2,XMMWORD[64+rbp]
        xor     r15d,r11d
        ror     r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        ror     r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        ror     r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        ror     r14d,9
        xor     r12d,edx
        ror     r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[44+rsp]
        mov     esi,r9d
        ror     r14d,11
        xor     r12d,edx
        xor     esi,r10d
        ror     r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        ror     r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        vmovdqa XMMWORD[32+rsp],xmm6
        vpalignr        xmm4,xmm0,xmm3,4
        ror     r13d,14
        mov     r8d,r14d
        vpalignr        xmm7,xmm2,xmm1,4
        mov     r12d,ebx
        xor     r13d,eax
DB      143,232,120,194,236,14
        ror     r14d,9
        xor     r12d,ecx
        vpsrld  xmm4,xmm4,3
        ror     r13d,5
        xor     r14d,r8d
        vpaddd  xmm3,xmm3,xmm7
        and     r12d,eax
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[48+rsp]
        mov     r15d,r8d
DB      143,232,120,194,245,11
        ror     r14d,11
        xor     r12d,ecx
        vpxor   xmm4,xmm4,xmm5
        xor     r15d,r9d
        ror     r13d,6
        add     edx,r12d
        and     esi,r15d
DB      143,232,120,194,250,13
        xor     r14d,r8d
        add     edx,r13d
        vpxor   xmm4,xmm4,xmm6
        xor     esi,r9d
        add     r11d,edx
        vpsrld  xmm6,xmm2,10
        ror     r14d,2
        add     edx,esi
        vpaddd  xmm3,xmm3,xmm4
        mov     r13d,r11d
        add     r14d,edx
DB      143,232,120,194,239,2
        ror     r13d,14
        mov     edx,r14d
        vpxor   xmm7,xmm7,xmm6
        mov     r12d,eax
        xor     r13d,r11d
        ror     r14d,9
        xor     r12d,ebx
        vpxor   xmm7,xmm7,xmm5
        ror     r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r13d,r11d
        vpsrldq xmm7,xmm7,8
        add     ecx,DWORD[52+rsp]
        mov     esi,edx
        ror     r14d,11
        xor     r12d,ebx
        vpaddd  xmm3,xmm3,xmm7
        xor     esi,r8d
        ror     r13d,6
        add     ecx,r12d
        and     r15d,esi
DB      143,232,120,194,251,13
        xor     r14d,edx
        add     ecx,r13d
        vpsrld  xmm6,xmm3,10
        xor     r15d,r8d
        add     r10d,ecx
DB      143,232,120,194,239,2
        ror     r14d,2
        add     ecx,r15d
        vpxor   xmm7,xmm7,xmm6
        mov     r13d,r10d
        add     r14d,ecx
        ror     r13d,14
        mov     ecx,r14d
        vpxor   xmm7,xmm7,xmm5
        mov     r12d,r11d
        xor     r13d,r10d
        ror     r14d,9
        xor     r12d,eax
        vpslldq xmm7,xmm7,8
        ror     r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r13d,r10d
        vpaddd  xmm3,xmm3,xmm7
        add     ebx,DWORD[56+rsp]
        mov     r15d,ecx
        ror     r14d,11
        xor     r12d,eax
        vpaddd  xmm6,xmm3,XMMWORD[96+rbp]
        xor     r15d,edx
        ror     r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        ror     r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        ror     r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        ror     r14d,9
        xor     r12d,r11d
        ror     r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[60+rsp]
        mov     esi,ebx
        ror     r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        ror     r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        ror     r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        vmovdqa XMMWORD[48+rsp],xmm6
        mov     r12,QWORD[((64+0))+rsp]
        vpand   xmm11,xmm11,xmm14
        mov     r15,QWORD[((64+8))+rsp]
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r12*1+r15],xmm8
        lea     r12,[16+r12]
        cmp     BYTE[131+rbp],0
        jne     NEAR $L$xop_00_47
        vmovdqu xmm9,XMMWORD[r12]
        mov     QWORD[((64+0))+rsp],r12
        ror     r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        xor     r13d,r8d
        ror     r14d,9
        xor     r12d,r10d
        ror     r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[rsp]
        mov     r15d,eax
        ror     r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        ror     r13d,6
        add     r11d,r12d
        and     esi,r15d
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        add     edx,r11d
        ror     r14d,2
        add     r11d,esi
        mov     r13d,edx
        add     r14d,r11d
        ror     r13d,14
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        ror     r14d,9
        xor     r12d,r9d
        ror     r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vpxor   xmm9,xmm9,xmm8
        xor     r13d,edx
        add     r10d,DWORD[4+rsp]
        mov     esi,r11d
        ror     r14d,11
        xor     r12d,r9d
        xor     esi,eax
        ror     r13d,6
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        ror     r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        add     r14d,r10d
        ror     r13d,14
        mov     r10d,r14d
        mov     r12d,edx
        xor     r13d,ecx
        ror     r14d,9
        xor     r12d,r8d
        ror     r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[8+rsp]
        mov     r15d,r10d
        ror     r14d,11
        xor     r12d,r8d
        xor     r15d,r11d
        ror     r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        ror     r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        ror     r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        ror     r14d,9
        xor     r12d,edx
        ror     r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[12+rsp]
        mov     esi,r9d
        ror     r14d,11
        xor     r12d,edx
        xor     esi,r10d
        ror     r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        ror     r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        ror     r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        xor     r13d,eax
        ror     r14d,9
        xor     r12d,ecx
        ror     r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[16+rsp]
        mov     r15d,r8d
        ror     r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        ror     r13d,6
        add     edx,r12d
        and     esi,r15d
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        add     r11d,edx
        ror     r14d,2
        add     edx,esi
        mov     r13d,r11d
        add     r14d,edx
        ror     r13d,14
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        ror     r14d,9
        xor     r12d,ebx
        ror     r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r13d,r11d
        add     ecx,DWORD[20+rsp]
        mov     esi,edx
        ror     r14d,11
        xor     r12d,ebx
        xor     esi,r8d
        ror     r13d,6
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        ror     r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        add     r14d,ecx
        ror     r13d,14
        mov     ecx,r14d
        mov     r12d,r11d
        xor     r13d,r10d
        ror     r14d,9
        xor     r12d,eax
        ror     r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[24+rsp]
        mov     r15d,ecx
        ror     r14d,11
        xor     r12d,eax
        xor     r15d,edx
        ror     r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        ror     r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        ror     r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        ror     r14d,9
        xor     r12d,r11d
        ror     r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[28+rsp]
        mov     esi,ebx
        ror     r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        ror     r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        ror     r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        ror     r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        xor     r13d,r8d
        ror     r14d,9
        xor     r12d,r10d
        ror     r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[32+rsp]
        mov     r15d,eax
        ror     r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        ror     r13d,6
        add     r11d,r12d
        and     esi,r15d
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        add     edx,r11d
        ror     r14d,2
        add     r11d,esi
        mov     r13d,edx
        add     r14d,r11d
        ror     r13d,14
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        ror     r14d,9
        xor     r12d,r9d
        ror     r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r13d,edx
        add     r10d,DWORD[36+rsp]
        mov     esi,r11d
        ror     r14d,11
        xor     r12d,r9d
        xor     esi,eax
        ror     r13d,6
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        ror     r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        add     r14d,r10d
        ror     r13d,14
        mov     r10d,r14d
        mov     r12d,edx
        xor     r13d,ecx
        ror     r14d,9
        xor     r12d,r8d
        ror     r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[40+rsp]
        mov     r15d,r10d
        ror     r14d,11
        xor     r12d,r8d
        xor     r15d,r11d
        ror     r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        ror     r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        ror     r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        ror     r14d,9
        xor     r12d,edx
        ror     r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[44+rsp]
        mov     esi,r9d
        ror     r14d,11
        xor     r12d,edx
        xor     esi,r10d
        ror     r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        ror     r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        ror     r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        xor     r13d,eax
        ror     r14d,9
        xor     r12d,ecx
        ror     r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[48+rsp]
        mov     r15d,r8d
        ror     r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        ror     r13d,6
        add     edx,r12d
        and     esi,r15d
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        add     r11d,edx
        ror     r14d,2
        add     edx,esi
        mov     r13d,r11d
        add     r14d,edx
        ror     r13d,14
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        ror     r14d,9
        xor     r12d,ebx
        ror     r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r13d,r11d
        add     ecx,DWORD[52+rsp]
        mov     esi,edx
        ror     r14d,11
        xor     r12d,ebx
        xor     esi,r8d
        ror     r13d,6
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        ror     r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        add     r14d,ecx
        ror     r13d,14
        mov     ecx,r14d
        mov     r12d,r11d
        xor     r13d,r10d
        ror     r14d,9
        xor     r12d,eax
        ror     r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[56+rsp]
        mov     r15d,ecx
        ror     r14d,11
        xor     r12d,eax
        xor     r15d,edx
        ror     r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        ror     r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        ror     r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        ror     r14d,9
        xor     r12d,r11d
        ror     r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[60+rsp]
        mov     esi,ebx
        ror     r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        ror     r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        ror     r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        mov     r12,QWORD[((64+0))+rsp]
        mov     r13,QWORD[((64+8))+rsp]
        mov     r15,QWORD[((64+40))+rsp]
        mov     rsi,QWORD[((64+48))+rsp]

        vpand   xmm11,xmm11,xmm14
        mov     eax,r14d
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r13*1+r12],xmm8
        lea     r12,[16+r12]

        add     eax,DWORD[r15]
        add     ebx,DWORD[4+r15]
        add     ecx,DWORD[8+r15]
        add     edx,DWORD[12+r15]
        add     r8d,DWORD[16+r15]
        add     r9d,DWORD[20+r15]
        add     r10d,DWORD[24+r15]
        add     r11d,DWORD[28+r15]

        cmp     r12,QWORD[((64+16))+rsp]

        mov     DWORD[r15],eax
        mov     DWORD[4+r15],ebx
        mov     DWORD[8+r15],ecx
        mov     DWORD[12+r15],edx
        mov     DWORD[16+r15],r8d
        mov     DWORD[20+r15],r9d
        mov     DWORD[24+r15],r10d
        mov     DWORD[28+r15],r11d

        jb      NEAR $L$loop_xop

        mov     r8,QWORD[((64+32))+rsp]
        mov     rsi,QWORD[120+rsp]

        vmovdqu XMMWORD[r8],xmm8
        vzeroall
        movaps  xmm6,XMMWORD[128+rsp]
        movaps  xmm7,XMMWORD[144+rsp]
        movaps  xmm8,XMMWORD[160+rsp]
        movaps  xmm9,XMMWORD[176+rsp]
        movaps  xmm10,XMMWORD[192+rsp]
        movaps  xmm11,XMMWORD[208+rsp]
        movaps  xmm12,XMMWORD[224+rsp]
        movaps  xmm13,XMMWORD[240+rsp]
        movaps  xmm14,XMMWORD[256+rsp]
        movaps  xmm15,XMMWORD[272+rsp]
        mov     r15,QWORD[((-48))+rsi]

        mov     r14,QWORD[((-40))+rsi]

        mov     r13,QWORD[((-32))+rsi]

        mov     r12,QWORD[((-24))+rsi]

        mov     rbp,QWORD[((-16))+rsi]

        mov     rbx,QWORD[((-8))+rsi]

        lea     rsp,[rsi]

$L$epilogue_xop:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_aesni_cbc_sha256_enc_xop:

ALIGN   64
aesni_cbc_sha256_enc_avx:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_avx:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8
        mov     rcx,r9
        mov     r8,QWORD[40+rsp]
        mov     r9,QWORD[48+rsp]



$L$avx_shortcut:
        mov     r10,QWORD[56+rsp]
        mov     rax,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        push    r15

        sub     rsp,288
        and     rsp,-64

        shl     rdx,6
        sub     rsi,rdi
        sub     r10,rdi
        add     rdx,rdi


        mov     QWORD[((64+8))+rsp],rsi
        mov     QWORD[((64+16))+rsp],rdx

        mov     QWORD[((64+32))+rsp],r8
        mov     QWORD[((64+40))+rsp],r9
        mov     QWORD[((64+48))+rsp],r10
        mov     QWORD[120+rsp],rax

        movaps  XMMWORD[128+rsp],xmm6
        movaps  XMMWORD[144+rsp],xmm7
        movaps  XMMWORD[160+rsp],xmm8
        movaps  XMMWORD[176+rsp],xmm9
        movaps  XMMWORD[192+rsp],xmm10
        movaps  XMMWORD[208+rsp],xmm11
        movaps  XMMWORD[224+rsp],xmm12
        movaps  XMMWORD[240+rsp],xmm13
        movaps  XMMWORD[256+rsp],xmm14
        movaps  XMMWORD[272+rsp],xmm15
$L$prologue_avx:
        vzeroall

        mov     r12,rdi
        lea     rdi,[128+rcx]
        lea     r13,[((K256+544))]
        mov     r14d,DWORD[((240-128))+rdi]
        mov     r15,r9
        mov     rsi,r10
        vmovdqu xmm8,XMMWORD[r8]
        sub     r14,9

        mov     eax,DWORD[r15]
        mov     ebx,DWORD[4+r15]
        mov     ecx,DWORD[8+r15]
        mov     edx,DWORD[12+r15]
        mov     r8d,DWORD[16+r15]
        mov     r9d,DWORD[20+r15]
        mov     r10d,DWORD[24+r15]
        mov     r11d,DWORD[28+r15]

        vmovdqa xmm14,XMMWORD[r14*8+r13]
        vmovdqa xmm13,XMMWORD[16+r14*8+r13]
        vmovdqa xmm12,XMMWORD[32+r14*8+r13]
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        jmp     NEAR $L$loop_avx
ALIGN   16
$L$loop_avx:
        vmovdqa xmm7,XMMWORD[((K256+512))]
        vmovdqu xmm0,XMMWORD[r12*1+rsi]
        vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
        vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
        vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
        vpshufb xmm0,xmm0,xmm7
        lea     rbp,[K256]
        vpshufb xmm1,xmm1,xmm7
        vpshufb xmm2,xmm2,xmm7
        vpaddd  xmm4,xmm0,XMMWORD[rbp]
        vpshufb xmm3,xmm3,xmm7
        vpaddd  xmm5,xmm1,XMMWORD[32+rbp]
        vpaddd  xmm6,xmm2,XMMWORD[64+rbp]
        vpaddd  xmm7,xmm3,XMMWORD[96+rbp]
        vmovdqa XMMWORD[rsp],xmm4
        mov     r14d,eax
        vmovdqa XMMWORD[16+rsp],xmm5
        mov     esi,ebx
        vmovdqa XMMWORD[32+rsp],xmm6
        xor     esi,ecx
        vmovdqa XMMWORD[48+rsp],xmm7
        mov     r13d,r8d
        jmp     NEAR $L$avx_00_47

ALIGN   16
$L$avx_00_47:
        sub     rbp,-16*2*4
        vmovdqu xmm9,XMMWORD[r12]
        mov     QWORD[((64+0))+rsp],r12
        vpalignr        xmm4,xmm1,xmm0,4
        shrd    r13d,r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        vpalignr        xmm7,xmm3,xmm2,4
        xor     r13d,r8d
        shrd    r14d,r14d,9
        xor     r12d,r10d
        vpsrld  xmm6,xmm4,7
        shrd    r13d,r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vpaddd  xmm0,xmm0,xmm7
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[rsp]
        mov     r15d,eax
        vpsrld  xmm7,xmm4,3
        shrd    r14d,r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        vpslld  xmm5,xmm4,14
        shrd    r13d,r13d,6
        add     r11d,r12d
        and     esi,r15d
        vpxor   xmm4,xmm7,xmm6
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        vpshufd xmm7,xmm3,250
        add     edx,r11d
        shrd    r14d,r14d,2
        add     r11d,esi
        vpsrld  xmm6,xmm6,11
        mov     r13d,edx
        add     r14d,r11d
        shrd    r13d,r13d,14
        vpxor   xmm4,xmm4,xmm5
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        vpslld  xmm5,xmm5,11
        shrd    r14d,r14d,9
        xor     r12d,r9d
        shrd    r13d,r13d,5
        vpxor   xmm4,xmm4,xmm6
        xor     r14d,r11d
        and     r12d,edx
        vpxor   xmm9,xmm9,xmm8
        xor     r13d,edx
        vpsrld  xmm6,xmm7,10
        add     r10d,DWORD[4+rsp]
        mov     esi,r11d
        shrd    r14d,r14d,11
        vpxor   xmm4,xmm4,xmm5
        xor     r12d,r9d
        xor     esi,eax
        shrd    r13d,r13d,6
        vpsrlq  xmm7,xmm7,17
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        vpaddd  xmm0,xmm0,xmm4
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        vpxor   xmm6,xmm6,xmm7
        shrd    r14d,r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        vpsrlq  xmm7,xmm7,2
        add     r14d,r10d
        shrd    r13d,r13d,14
        mov     r10d,r14d
        vpxor   xmm6,xmm6,xmm7
        mov     r12d,edx
        xor     r13d,ecx
        shrd    r14d,r14d,9
        vpshufd xmm6,xmm6,132
        xor     r12d,r8d
        shrd    r13d,r13d,5
        xor     r14d,r10d
        vpsrldq xmm6,xmm6,8
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[8+rsp]
        vpaddd  xmm0,xmm0,xmm6
        mov     r15d,r10d
        shrd    r14d,r14d,11
        xor     r12d,r8d
        vpshufd xmm7,xmm0,80
        xor     r15d,r11d
        shrd    r13d,r13d,6
        add     r9d,r12d
        vpsrld  xmm6,xmm7,10
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        vpsrlq  xmm7,xmm7,17
        xor     esi,r11d
        add     ebx,r9d
        shrd    r14d,r14d,2
        vpxor   xmm6,xmm6,xmm7
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        vpsrlq  xmm7,xmm7,2
        shrd    r13d,r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        vpxor   xmm6,xmm6,xmm7
        xor     r13d,ebx
        shrd    r14d,r14d,9
        xor     r12d,edx
        vpshufd xmm6,xmm6,232
        shrd    r13d,r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vpslldq xmm6,xmm6,8
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[12+rsp]
        mov     esi,r9d
        vpaddd  xmm0,xmm0,xmm6
        shrd    r14d,r14d,11
        xor     r12d,edx
        xor     esi,r10d
        vpaddd  xmm6,xmm0,XMMWORD[rbp]
        shrd    r13d,r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        shrd    r14d,r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        vmovdqa XMMWORD[rsp],xmm6
        vpalignr        xmm4,xmm2,xmm1,4
        shrd    r13d,r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        vpalignr        xmm7,xmm0,xmm3,4
        xor     r13d,eax
        shrd    r14d,r14d,9
        xor     r12d,ecx
        vpsrld  xmm6,xmm4,7
        shrd    r13d,r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vpaddd  xmm1,xmm1,xmm7
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[16+rsp]
        mov     r15d,r8d
        vpsrld  xmm7,xmm4,3
        shrd    r14d,r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        vpslld  xmm5,xmm4,14
        shrd    r13d,r13d,6
        add     edx,r12d
        and     esi,r15d
        vpxor   xmm4,xmm7,xmm6
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        vpshufd xmm7,xmm0,250
        add     r11d,edx
        shrd    r14d,r14d,2
        add     edx,esi
        vpsrld  xmm6,xmm6,11
        mov     r13d,r11d
        add     r14d,edx
        shrd    r13d,r13d,14
        vpxor   xmm4,xmm4,xmm5
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        vpslld  xmm5,xmm5,11
        shrd    r14d,r14d,9
        xor     r12d,ebx
        shrd    r13d,r13d,5
        vpxor   xmm4,xmm4,xmm6
        xor     r14d,edx
        and     r12d,r11d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r13d,r11d
        vpsrld  xmm6,xmm7,10
        add     ecx,DWORD[20+rsp]
        mov     esi,edx
        shrd    r14d,r14d,11
        vpxor   xmm4,xmm4,xmm5
        xor     r12d,ebx
        xor     esi,r8d
        shrd    r13d,r13d,6
        vpsrlq  xmm7,xmm7,17
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        vpaddd  xmm1,xmm1,xmm4
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        vpxor   xmm6,xmm6,xmm7
        shrd    r14d,r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        vpsrlq  xmm7,xmm7,2
        add     r14d,ecx
        shrd    r13d,r13d,14
        mov     ecx,r14d
        vpxor   xmm6,xmm6,xmm7
        mov     r12d,r11d
        xor     r13d,r10d
        shrd    r14d,r14d,9
        vpshufd xmm6,xmm6,132
        xor     r12d,eax
        shrd    r13d,r13d,5
        xor     r14d,ecx
        vpsrldq xmm6,xmm6,8
        and     r12d,r10d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[24+rsp]
        vpaddd  xmm1,xmm1,xmm6
        mov     r15d,ecx
        shrd    r14d,r14d,11
        xor     r12d,eax
        vpshufd xmm7,xmm1,80
        xor     r15d,edx
        shrd    r13d,r13d,6
        add     ebx,r12d
        vpsrld  xmm6,xmm7,10
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        vpsrlq  xmm7,xmm7,17
        xor     esi,edx
        add     r9d,ebx
        shrd    r14d,r14d,2
        vpxor   xmm6,xmm6,xmm7
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        vpsrlq  xmm7,xmm7,2
        shrd    r13d,r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        vpxor   xmm6,xmm6,xmm7
        xor     r13d,r9d
        shrd    r14d,r14d,9
        xor     r12d,r11d
        vpshufd xmm6,xmm6,232
        shrd    r13d,r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vpslldq xmm6,xmm6,8
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[28+rsp]
        mov     esi,ebx
        vpaddd  xmm1,xmm1,xmm6
        shrd    r14d,r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        vpaddd  xmm6,xmm1,XMMWORD[32+rbp]
        shrd    r13d,r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        shrd    r14d,r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        vmovdqa XMMWORD[16+rsp],xmm6
        vpalignr        xmm4,xmm3,xmm2,4
        shrd    r13d,r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        vpalignr        xmm7,xmm1,xmm0,4
        xor     r13d,r8d
        shrd    r14d,r14d,9
        xor     r12d,r10d
        vpsrld  xmm6,xmm4,7
        shrd    r13d,r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vpaddd  xmm2,xmm2,xmm7
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[32+rsp]
        mov     r15d,eax
        vpsrld  xmm7,xmm4,3
        shrd    r14d,r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        vpslld  xmm5,xmm4,14
        shrd    r13d,r13d,6
        add     r11d,r12d
        and     esi,r15d
        vpxor   xmm4,xmm7,xmm6
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        vpshufd xmm7,xmm1,250
        add     edx,r11d
        shrd    r14d,r14d,2
        add     r11d,esi
        vpsrld  xmm6,xmm6,11
        mov     r13d,edx
        add     r14d,r11d
        shrd    r13d,r13d,14
        vpxor   xmm4,xmm4,xmm5
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        vpslld  xmm5,xmm5,11
        shrd    r14d,r14d,9
        xor     r12d,r9d
        shrd    r13d,r13d,5
        vpxor   xmm4,xmm4,xmm6
        xor     r14d,r11d
        and     r12d,edx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r13d,edx
        vpsrld  xmm6,xmm7,10
        add     r10d,DWORD[36+rsp]
        mov     esi,r11d
        shrd    r14d,r14d,11
        vpxor   xmm4,xmm4,xmm5
        xor     r12d,r9d
        xor     esi,eax
        shrd    r13d,r13d,6
        vpsrlq  xmm7,xmm7,17
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        vpaddd  xmm2,xmm2,xmm4
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        vpxor   xmm6,xmm6,xmm7
        shrd    r14d,r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        vpsrlq  xmm7,xmm7,2
        add     r14d,r10d
        shrd    r13d,r13d,14
        mov     r10d,r14d
        vpxor   xmm6,xmm6,xmm7
        mov     r12d,edx
        xor     r13d,ecx
        shrd    r14d,r14d,9
        vpshufd xmm6,xmm6,132
        xor     r12d,r8d
        shrd    r13d,r13d,5
        xor     r14d,r10d
        vpsrldq xmm6,xmm6,8
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[40+rsp]
        vpaddd  xmm2,xmm2,xmm6
        mov     r15d,r10d
        shrd    r14d,r14d,11
        xor     r12d,r8d
        vpshufd xmm7,xmm2,80
        xor     r15d,r11d
        shrd    r13d,r13d,6
        add     r9d,r12d
        vpsrld  xmm6,xmm7,10
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        vpsrlq  xmm7,xmm7,17
        xor     esi,r11d
        add     ebx,r9d
        shrd    r14d,r14d,2
        vpxor   xmm6,xmm6,xmm7
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        vpsrlq  xmm7,xmm7,2
        shrd    r13d,r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        vpxor   xmm6,xmm6,xmm7
        xor     r13d,ebx
        shrd    r14d,r14d,9
        xor     r12d,edx
        vpshufd xmm6,xmm6,232
        shrd    r13d,r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vpslldq xmm6,xmm6,8
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[44+rsp]
        mov     esi,r9d
        vpaddd  xmm2,xmm2,xmm6
        shrd    r14d,r14d,11
        xor     r12d,edx
        xor     esi,r10d
        vpaddd  xmm6,xmm2,XMMWORD[64+rbp]
        shrd    r13d,r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        shrd    r14d,r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        vmovdqa XMMWORD[32+rsp],xmm6
        vpalignr        xmm4,xmm0,xmm3,4
        shrd    r13d,r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        vpalignr        xmm7,xmm2,xmm1,4
        xor     r13d,eax
        shrd    r14d,r14d,9
        xor     r12d,ecx
        vpsrld  xmm6,xmm4,7
        shrd    r13d,r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vpaddd  xmm3,xmm3,xmm7
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[48+rsp]
        mov     r15d,r8d
        vpsrld  xmm7,xmm4,3
        shrd    r14d,r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        vpslld  xmm5,xmm4,14
        shrd    r13d,r13d,6
        add     edx,r12d
        and     esi,r15d
        vpxor   xmm4,xmm7,xmm6
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        vpshufd xmm7,xmm2,250
        add     r11d,edx
        shrd    r14d,r14d,2
        add     edx,esi
        vpsrld  xmm6,xmm6,11
        mov     r13d,r11d
        add     r14d,edx
        shrd    r13d,r13d,14
        vpxor   xmm4,xmm4,xmm5
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        vpslld  xmm5,xmm5,11
        shrd    r14d,r14d,9
        xor     r12d,ebx
        shrd    r13d,r13d,5
        vpxor   xmm4,xmm4,xmm6
        xor     r14d,edx
        and     r12d,r11d
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r13d,r11d
        vpsrld  xmm6,xmm7,10
        add     ecx,DWORD[52+rsp]
        mov     esi,edx
        shrd    r14d,r14d,11
        vpxor   xmm4,xmm4,xmm5
        xor     r12d,ebx
        xor     esi,r8d
        shrd    r13d,r13d,6
        vpsrlq  xmm7,xmm7,17
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        vpaddd  xmm3,xmm3,xmm4
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        vpxor   xmm6,xmm6,xmm7
        shrd    r14d,r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        vpsrlq  xmm7,xmm7,2
        add     r14d,ecx
        shrd    r13d,r13d,14
        mov     ecx,r14d
        vpxor   xmm6,xmm6,xmm7
        mov     r12d,r11d
        xor     r13d,r10d
        shrd    r14d,r14d,9
        vpshufd xmm6,xmm6,132
        xor     r12d,eax
        shrd    r13d,r13d,5
        xor     r14d,ecx
        vpsrldq xmm6,xmm6,8
        and     r12d,r10d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[56+rsp]
        vpaddd  xmm3,xmm3,xmm6
        mov     r15d,ecx
        shrd    r14d,r14d,11
        xor     r12d,eax
        vpshufd xmm7,xmm3,80
        xor     r15d,edx
        shrd    r13d,r13d,6
        add     ebx,r12d
        vpsrld  xmm6,xmm7,10
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        vpsrlq  xmm7,xmm7,17
        xor     esi,edx
        add     r9d,ebx
        shrd    r14d,r14d,2
        vpxor   xmm6,xmm6,xmm7
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        vpsrlq  xmm7,xmm7,2
        shrd    r13d,r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        vpxor   xmm6,xmm6,xmm7
        xor     r13d,r9d
        shrd    r14d,r14d,9
        xor     r12d,r11d
        vpshufd xmm6,xmm6,232
        shrd    r13d,r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vpslldq xmm6,xmm6,8
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[60+rsp]
        mov     esi,ebx
        vpaddd  xmm3,xmm3,xmm6
        shrd    r14d,r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        vpaddd  xmm6,xmm3,XMMWORD[96+rbp]
        shrd    r13d,r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        shrd    r14d,r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        vmovdqa XMMWORD[48+rsp],xmm6
        mov     r12,QWORD[((64+0))+rsp]
        vpand   xmm11,xmm11,xmm14
        mov     r15,QWORD[((64+8))+rsp]
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r12*1+r15],xmm8
        lea     r12,[16+r12]
        cmp     BYTE[131+rbp],0
        jne     NEAR $L$avx_00_47
        vmovdqu xmm9,XMMWORD[r12]
        mov     QWORD[((64+0))+rsp],r12
        shrd    r13d,r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        xor     r13d,r8d
        shrd    r14d,r14d,9
        xor     r12d,r10d
        shrd    r13d,r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[rsp]
        mov     r15d,eax
        shrd    r14d,r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        shrd    r13d,r13d,6
        add     r11d,r12d
        and     esi,r15d
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        add     edx,r11d
        shrd    r14d,r14d,2
        add     r11d,esi
        mov     r13d,edx
        add     r14d,r11d
        shrd    r13d,r13d,14
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        shrd    r14d,r14d,9
        xor     r12d,r9d
        shrd    r13d,r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vpxor   xmm9,xmm9,xmm8
        xor     r13d,edx
        add     r10d,DWORD[4+rsp]
        mov     esi,r11d
        shrd    r14d,r14d,11
        xor     r12d,r9d
        xor     esi,eax
        shrd    r13d,r13d,6
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        shrd    r14d,r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        add     r14d,r10d
        shrd    r13d,r13d,14
        mov     r10d,r14d
        mov     r12d,edx
        xor     r13d,ecx
        shrd    r14d,r14d,9
        xor     r12d,r8d
        shrd    r13d,r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[8+rsp]
        mov     r15d,r10d
        shrd    r14d,r14d,11
        xor     r12d,r8d
        xor     r15d,r11d
        shrd    r13d,r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        shrd    r14d,r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        shrd    r13d,r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        shrd    r14d,r14d,9
        xor     r12d,edx
        shrd    r13d,r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[12+rsp]
        mov     esi,r9d
        shrd    r14d,r14d,11
        xor     r12d,edx
        xor     esi,r10d
        shrd    r13d,r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        shrd    r14d,r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        shrd    r13d,r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        xor     r13d,eax
        shrd    r14d,r14d,9
        xor     r12d,ecx
        shrd    r13d,r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[16+rsp]
        mov     r15d,r8d
        shrd    r14d,r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        shrd    r13d,r13d,6
        add     edx,r12d
        and     esi,r15d
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        add     r11d,edx
        shrd    r14d,r14d,2
        add     edx,esi
        mov     r13d,r11d
        add     r14d,edx
        shrd    r13d,r13d,14
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        shrd    r14d,r14d,9
        xor     r12d,ebx
        shrd    r13d,r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r13d,r11d
        add     ecx,DWORD[20+rsp]
        mov     esi,edx
        shrd    r14d,r14d,11
        xor     r12d,ebx
        xor     esi,r8d
        shrd    r13d,r13d,6
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        shrd    r14d,r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        add     r14d,ecx
        shrd    r13d,r13d,14
        mov     ecx,r14d
        mov     r12d,r11d
        xor     r13d,r10d
        shrd    r14d,r14d,9
        xor     r12d,eax
        shrd    r13d,r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[24+rsp]
        mov     r15d,ecx
        shrd    r14d,r14d,11
        xor     r12d,eax
        xor     r15d,edx
        shrd    r13d,r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        shrd    r14d,r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        shrd    r13d,r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        shrd    r14d,r14d,9
        xor     r12d,r11d
        shrd    r13d,r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[28+rsp]
        mov     esi,ebx
        shrd    r14d,r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        shrd    r13d,r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        shrd    r14d,r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        shrd    r13d,r13d,14
        mov     eax,r14d
        mov     r12d,r9d
        xor     r13d,r8d
        shrd    r14d,r14d,9
        xor     r12d,r10d
        shrd    r13d,r13d,5
        xor     r14d,eax
        and     r12d,r8d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r13d,r8d
        add     r11d,DWORD[32+rsp]
        mov     r15d,eax
        shrd    r14d,r14d,11
        xor     r12d,r10d
        xor     r15d,ebx
        shrd    r13d,r13d,6
        add     r11d,r12d
        and     esi,r15d
        xor     r14d,eax
        add     r11d,r13d
        xor     esi,ebx
        add     edx,r11d
        shrd    r14d,r14d,2
        add     r11d,esi
        mov     r13d,edx
        add     r14d,r11d
        shrd    r13d,r13d,14
        mov     r11d,r14d
        mov     r12d,r8d
        xor     r13d,edx
        shrd    r14d,r14d,9
        xor     r12d,r9d
        shrd    r13d,r13d,5
        xor     r14d,r11d
        and     r12d,edx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r13d,edx
        add     r10d,DWORD[36+rsp]
        mov     esi,r11d
        shrd    r14d,r14d,11
        xor     r12d,r9d
        xor     esi,eax
        shrd    r13d,r13d,6
        add     r10d,r12d
        and     r15d,esi
        xor     r14d,r11d
        add     r10d,r13d
        xor     r15d,eax
        add     ecx,r10d
        shrd    r14d,r14d,2
        add     r10d,r15d
        mov     r13d,ecx
        add     r14d,r10d
        shrd    r13d,r13d,14
        mov     r10d,r14d
        mov     r12d,edx
        xor     r13d,ecx
        shrd    r14d,r14d,9
        xor     r12d,r8d
        shrd    r13d,r13d,5
        xor     r14d,r10d
        and     r12d,ecx
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r13d,ecx
        add     r9d,DWORD[40+rsp]
        mov     r15d,r10d
        shrd    r14d,r14d,11
        xor     r12d,r8d
        xor     r15d,r11d
        shrd    r13d,r13d,6
        add     r9d,r12d
        and     esi,r15d
        xor     r14d,r10d
        add     r9d,r13d
        xor     esi,r11d
        add     ebx,r9d
        shrd    r14d,r14d,2
        add     r9d,esi
        mov     r13d,ebx
        add     r14d,r9d
        shrd    r13d,r13d,14
        mov     r9d,r14d
        mov     r12d,ecx
        xor     r13d,ebx
        shrd    r14d,r14d,9
        xor     r12d,edx
        shrd    r13d,r13d,5
        xor     r14d,r9d
        and     r12d,ebx
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r13d,ebx
        add     r8d,DWORD[44+rsp]
        mov     esi,r9d
        shrd    r14d,r14d,11
        xor     r12d,edx
        xor     esi,r10d
        shrd    r13d,r13d,6
        add     r8d,r12d
        and     r15d,esi
        xor     r14d,r9d
        add     r8d,r13d
        xor     r15d,r10d
        add     eax,r8d
        shrd    r14d,r14d,2
        add     r8d,r15d
        mov     r13d,eax
        add     r14d,r8d
        shrd    r13d,r13d,14
        mov     r8d,r14d
        mov     r12d,ebx
        xor     r13d,eax
        shrd    r14d,r14d,9
        xor     r12d,ecx
        shrd    r13d,r13d,5
        xor     r14d,r8d
        and     r12d,eax
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r13d,eax
        add     edx,DWORD[48+rsp]
        mov     r15d,r8d
        shrd    r14d,r14d,11
        xor     r12d,ecx
        xor     r15d,r9d
        shrd    r13d,r13d,6
        add     edx,r12d
        and     esi,r15d
        xor     r14d,r8d
        add     edx,r13d
        xor     esi,r9d
        add     r11d,edx
        shrd    r14d,r14d,2
        add     edx,esi
        mov     r13d,r11d
        add     r14d,edx
        shrd    r13d,r13d,14
        mov     edx,r14d
        mov     r12d,eax
        xor     r13d,r11d
        shrd    r14d,r14d,9
        xor     r12d,ebx
        shrd    r13d,r13d,5
        xor     r14d,edx
        and     r12d,r11d
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r13d,r11d
        add     ecx,DWORD[52+rsp]
        mov     esi,edx
        shrd    r14d,r14d,11
        xor     r12d,ebx
        xor     esi,r8d
        shrd    r13d,r13d,6
        add     ecx,r12d
        and     r15d,esi
        xor     r14d,edx
        add     ecx,r13d
        xor     r15d,r8d
        add     r10d,ecx
        shrd    r14d,r14d,2
        add     ecx,r15d
        mov     r13d,r10d
        add     r14d,ecx
        shrd    r13d,r13d,14
        mov     ecx,r14d
        mov     r12d,r11d
        xor     r13d,r10d
        shrd    r14d,r14d,9
        xor     r12d,eax
        shrd    r13d,r13d,5
        xor     r14d,ecx
        and     r12d,r10d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r13d,r10d
        add     ebx,DWORD[56+rsp]
        mov     r15d,ecx
        shrd    r14d,r14d,11
        xor     r12d,eax
        xor     r15d,edx
        shrd    r13d,r13d,6
        add     ebx,r12d
        and     esi,r15d
        xor     r14d,ecx
        add     ebx,r13d
        xor     esi,edx
        add     r9d,ebx
        shrd    r14d,r14d,2
        add     ebx,esi
        mov     r13d,r9d
        add     r14d,ebx
        shrd    r13d,r13d,14
        mov     ebx,r14d
        mov     r12d,r10d
        xor     r13d,r9d
        shrd    r14d,r14d,9
        xor     r12d,r11d
        shrd    r13d,r13d,5
        xor     r14d,ebx
        and     r12d,r9d
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r13d,r9d
        add     eax,DWORD[60+rsp]
        mov     esi,ebx
        shrd    r14d,r14d,11
        xor     r12d,r11d
        xor     esi,ecx
        shrd    r13d,r13d,6
        add     eax,r12d
        and     r15d,esi
        xor     r14d,ebx
        add     eax,r13d
        xor     r15d,ecx
        add     r8d,eax
        shrd    r14d,r14d,2
        add     eax,r15d
        mov     r13d,r8d
        add     r14d,eax
        mov     r12,QWORD[((64+0))+rsp]
        mov     r13,QWORD[((64+8))+rsp]
        mov     r15,QWORD[((64+40))+rsp]
        mov     rsi,QWORD[((64+48))+rsp]

        vpand   xmm11,xmm11,xmm14
        mov     eax,r14d
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r13*1+r12],xmm8
        lea     r12,[16+r12]

        add     eax,DWORD[r15]
        add     ebx,DWORD[4+r15]
        add     ecx,DWORD[8+r15]
        add     edx,DWORD[12+r15]
        add     r8d,DWORD[16+r15]
        add     r9d,DWORD[20+r15]
        add     r10d,DWORD[24+r15]
        add     r11d,DWORD[28+r15]

        cmp     r12,QWORD[((64+16))+rsp]

        mov     DWORD[r15],eax
        mov     DWORD[4+r15],ebx
        mov     DWORD[8+r15],ecx
        mov     DWORD[12+r15],edx
        mov     DWORD[16+r15],r8d
        mov     DWORD[20+r15],r9d
        mov     DWORD[24+r15],r10d
        mov     DWORD[28+r15],r11d
        jb      NEAR $L$loop_avx

        mov     r8,QWORD[((64+32))+rsp]
        mov     rsi,QWORD[120+rsp]

        vmovdqu XMMWORD[r8],xmm8
        vzeroall
        movaps  xmm6,XMMWORD[128+rsp]
        movaps  xmm7,XMMWORD[144+rsp]
        movaps  xmm8,XMMWORD[160+rsp]
        movaps  xmm9,XMMWORD[176+rsp]
        movaps  xmm10,XMMWORD[192+rsp]
        movaps  xmm11,XMMWORD[208+rsp]
        movaps  xmm12,XMMWORD[224+rsp]
        movaps  xmm13,XMMWORD[240+rsp]
        movaps  xmm14,XMMWORD[256+rsp]
        movaps  xmm15,XMMWORD[272+rsp]
        mov     r15,QWORD[((-48))+rsi]

        mov     r14,QWORD[((-40))+rsi]

        mov     r13,QWORD[((-32))+rsi]

        mov     r12,QWORD[((-24))+rsi]

        mov     rbp,QWORD[((-16))+rsi]

        mov     rbx,QWORD[((-8))+rsi]

        lea     rsp,[rsi]

$L$epilogue_avx:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_aesni_cbc_sha256_enc_avx:

ALIGN   64
aesni_cbc_sha256_enc_avx2:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_avx2:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8
        mov     rcx,r9
        mov     r8,QWORD[40+rsp]
        mov     r9,QWORD[48+rsp]



$L$avx2_shortcut:
        mov     r10,QWORD[56+rsp]
        mov     rax,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        push    r15

        sub     rsp,736
        and     rsp,-256*4
        add     rsp,448

        shl     rdx,6
        sub     rsi,rdi
        sub     r10,rdi
        add     rdx,rdi



        mov     QWORD[((64+16))+rsp],rdx

        mov     QWORD[((64+32))+rsp],r8
        mov     QWORD[((64+40))+rsp],r9
        mov     QWORD[((64+48))+rsp],r10
        mov     QWORD[120+rsp],rax

        movaps  XMMWORD[128+rsp],xmm6
        movaps  XMMWORD[144+rsp],xmm7
        movaps  XMMWORD[160+rsp],xmm8
        movaps  XMMWORD[176+rsp],xmm9
        movaps  XMMWORD[192+rsp],xmm10
        movaps  XMMWORD[208+rsp],xmm11
        movaps  XMMWORD[224+rsp],xmm12
        movaps  XMMWORD[240+rsp],xmm13
        movaps  XMMWORD[256+rsp],xmm14
        movaps  XMMWORD[272+rsp],xmm15
$L$prologue_avx2:
        vzeroall

        mov     r13,rdi
        vpinsrq xmm15,xmm15,rsi,1
        lea     rdi,[128+rcx]
        lea     r12,[((K256+544))]
        mov     r14d,DWORD[((240-128))+rdi]
        mov     r15,r9
        mov     rsi,r10
        vmovdqu xmm8,XMMWORD[r8]
        lea     r14,[((-9))+r14]

        vmovdqa xmm14,XMMWORD[r14*8+r12]
        vmovdqa xmm13,XMMWORD[16+r14*8+r12]
        vmovdqa xmm12,XMMWORD[32+r14*8+r12]

        sub     r13,-16*4
        mov     eax,DWORD[r15]
        lea     r12,[r13*1+rsi]
        mov     ebx,DWORD[4+r15]
        cmp     r13,rdx
        mov     ecx,DWORD[8+r15]
        cmove   r12,rsp
        mov     edx,DWORD[12+r15]
        mov     r8d,DWORD[16+r15]
        mov     r9d,DWORD[20+r15]
        mov     r10d,DWORD[24+r15]
        mov     r11d,DWORD[28+r15]
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        jmp     NEAR $L$oop_avx2
ALIGN   16
$L$oop_avx2:
        vmovdqa ymm7,YMMWORD[((K256+512))]
        vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]
        vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]
        vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]
        vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]

        vinserti128     ymm0,ymm0,XMMWORD[r12],1
        vinserti128     ymm1,ymm1,XMMWORD[16+r12],1
        vpshufb ymm0,ymm0,ymm7
        vinserti128     ymm2,ymm2,XMMWORD[32+r12],1
        vpshufb ymm1,ymm1,ymm7
        vinserti128     ymm3,ymm3,XMMWORD[48+r12],1

        lea     rbp,[K256]
        vpshufb ymm2,ymm2,ymm7
        lea     r13,[((-64))+r13]
        vpaddd  ymm4,ymm0,YMMWORD[rbp]
        vpshufb ymm3,ymm3,ymm7
        vpaddd  ymm5,ymm1,YMMWORD[32+rbp]
        vpaddd  ymm6,ymm2,YMMWORD[64+rbp]
        vpaddd  ymm7,ymm3,YMMWORD[96+rbp]
        vmovdqa YMMWORD[rsp],ymm4
        xor     r14d,r14d
        vmovdqa YMMWORD[32+rsp],ymm5
        lea     rsp,[((-64))+rsp]
        mov     esi,ebx
        vmovdqa YMMWORD[rsp],ymm6
        xor     esi,ecx
        vmovdqa YMMWORD[32+rsp],ymm7
        mov     r12d,r9d
        sub     rbp,-16*2*4
        jmp     NEAR $L$avx2_00_47

ALIGN   16
$L$avx2_00_47:
        vmovdqu xmm9,XMMWORD[r13]
        vpinsrq xmm15,xmm15,r13,0
        lea     rsp,[((-64))+rsp]
        vpalignr        ymm4,ymm1,ymm0,4
        add     r11d,DWORD[((0+128))+rsp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        vpalignr        ymm7,ymm3,ymm2,4
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        vpsrld  ymm6,ymm4,7
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        vpaddd  ymm0,ymm0,ymm7
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        vpsrld  ymm7,ymm4,3
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        vpslld  ymm5,ymm4,14
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        vpxor   ymm4,ymm7,ymm6
        and     esi,r15d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        vpshufd ymm7,ymm3,250
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        vpsrld  ymm6,ymm6,11
        add     r10d,DWORD[((4+128))+rsp]
        and     r12d,edx
        rorx    r13d,edx,25
        vpxor   ymm4,ymm4,ymm5
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        vpslld  ymm5,ymm5,11
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        vpxor   ymm4,ymm4,ymm6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        vpsrld  ymm6,ymm7,10
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        vpxor   ymm4,ymm4,ymm5
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        vpsrlq  ymm7,ymm7,17
        and     r15d,esi
        vpxor   xmm9,xmm9,xmm8
        xor     r14d,r12d
        xor     r15d,eax
        vpaddd  ymm0,ymm0,ymm4
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        vpxor   ymm6,ymm6,ymm7
        add     r9d,DWORD[((8+128))+rsp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        vpsrlq  ymm7,ymm7,2
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        vpxor   ymm6,ymm6,ymm7
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        vpshufd ymm6,ymm6,132
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        vpsrldq ymm6,ymm6,8
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        vpaddd  ymm0,ymm0,ymm6
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        vpshufd ymm7,ymm0,80
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        vpsrld  ymm6,ymm7,10
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        vpsrlq  ymm7,ymm7,17
        add     r8d,DWORD[((12+128))+rsp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        vpxor   ymm6,ymm6,ymm7
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        vpsrlq  ymm7,ymm7,2
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        vpxor   ymm6,ymm6,ymm7
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        vpshufd ymm6,ymm6,232
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        vpslldq ymm6,ymm6,8
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        vpaddd  ymm0,ymm0,ymm6
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        vpaddd  ymm6,ymm0,YMMWORD[rbp]
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        vmovdqa YMMWORD[rsp],ymm6
        vpalignr        ymm4,ymm2,ymm1,4
        add     edx,DWORD[((32+128))+rsp]
        and     r12d,eax
        rorx    r13d,eax,25
        vpalignr        ymm7,ymm0,ymm3,4
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        vpsrld  ymm6,ymm4,7
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        vpaddd  ymm1,ymm1,ymm7
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        vpsrld  ymm7,ymm4,3
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        vpslld  ymm5,ymm4,14
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        vpxor   ymm4,ymm7,ymm6
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        vpshufd ymm7,ymm0,250
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        vpsrld  ymm6,ymm6,11
        add     ecx,DWORD[((36+128))+rsp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        vpxor   ymm4,ymm4,ymm5
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        vpslld  ymm5,ymm5,11
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        vpxor   ymm4,ymm4,ymm6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        vpsrld  ymm6,ymm7,10
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        vpxor   ymm4,ymm4,ymm5
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        vpsrlq  ymm7,ymm7,17
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        vpaddd  ymm1,ymm1,ymm4
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        vpxor   ymm6,ymm6,ymm7
        add     ebx,DWORD[((40+128))+rsp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        vpsrlq  ymm7,ymm7,2
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        vpxor   ymm6,ymm6,ymm7
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        vpshufd ymm6,ymm6,132
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        vpsrldq ymm6,ymm6,8
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        vpaddd  ymm1,ymm1,ymm6
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        vpshufd ymm7,ymm1,80
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        vpsrld  ymm6,ymm7,10
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        vpsrlq  ymm7,ymm7,17
        add     eax,DWORD[((44+128))+rsp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        vpxor   ymm6,ymm6,ymm7
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        vpsrlq  ymm7,ymm7,2
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        vpxor   ymm6,ymm6,ymm7
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        vpshufd ymm6,ymm6,232
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        vpslldq ymm6,ymm6,8
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        vpaddd  ymm1,ymm1,ymm6
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        vpaddd  ymm6,ymm1,YMMWORD[32+rbp]
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        vmovdqa YMMWORD[32+rsp],ymm6
        lea     rsp,[((-64))+rsp]
        vpalignr        ymm4,ymm3,ymm2,4
        add     r11d,DWORD[((0+128))+rsp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        vpalignr        ymm7,ymm1,ymm0,4
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        vpsrld  ymm6,ymm4,7
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        vpaddd  ymm2,ymm2,ymm7
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        vpsrld  ymm7,ymm4,3
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        vpslld  ymm5,ymm4,14
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        vpxor   ymm4,ymm7,ymm6
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        vpshufd ymm7,ymm1,250
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        vpsrld  ymm6,ymm6,11
        add     r10d,DWORD[((4+128))+rsp]
        and     r12d,edx
        rorx    r13d,edx,25
        vpxor   ymm4,ymm4,ymm5
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        vpslld  ymm5,ymm5,11
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        vpxor   ymm4,ymm4,ymm6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        vpsrld  ymm6,ymm7,10
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        vpxor   ymm4,ymm4,ymm5
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        vpsrlq  ymm7,ymm7,17
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r14d,r12d
        xor     r15d,eax
        vpaddd  ymm2,ymm2,ymm4
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        vpxor   ymm6,ymm6,ymm7
        add     r9d,DWORD[((8+128))+rsp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        vpsrlq  ymm7,ymm7,2
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        vpxor   ymm6,ymm6,ymm7
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        vpshufd ymm6,ymm6,132
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        vpsrldq ymm6,ymm6,8
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        vpaddd  ymm2,ymm2,ymm6
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        vpshufd ymm7,ymm2,80
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        vpsrld  ymm6,ymm7,10
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        vpsrlq  ymm7,ymm7,17
        add     r8d,DWORD[((12+128))+rsp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        vpxor   ymm6,ymm6,ymm7
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        vpsrlq  ymm7,ymm7,2
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        vpxor   ymm6,ymm6,ymm7
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        vpshufd ymm6,ymm6,232
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        vpslldq ymm6,ymm6,8
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        vpaddd  ymm2,ymm2,ymm6
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        vpaddd  ymm6,ymm2,YMMWORD[64+rbp]
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        vmovdqa YMMWORD[rsp],ymm6
        vpalignr        ymm4,ymm0,ymm3,4
        add     edx,DWORD[((32+128))+rsp]
        and     r12d,eax
        rorx    r13d,eax,25
        vpalignr        ymm7,ymm2,ymm1,4
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        vpsrld  ymm6,ymm4,7
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        vpaddd  ymm3,ymm3,ymm7
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        vpsrld  ymm7,ymm4,3
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        vpslld  ymm5,ymm4,14
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        vpxor   ymm4,ymm7,ymm6
        and     esi,r15d
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        vpshufd ymm7,ymm2,250
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        vpsrld  ymm6,ymm6,11
        add     ecx,DWORD[((36+128))+rsp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        vpxor   ymm4,ymm4,ymm5
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        vpslld  ymm5,ymm5,11
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        vpxor   ymm4,ymm4,ymm6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        vpsrld  ymm6,ymm7,10
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        vpxor   ymm4,ymm4,ymm5
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        vpsrlq  ymm7,ymm7,17
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        vpaddd  ymm3,ymm3,ymm4
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        vpxor   ymm6,ymm6,ymm7
        add     ebx,DWORD[((40+128))+rsp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        vpsrlq  ymm7,ymm7,2
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        vpxor   ymm6,ymm6,ymm7
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        vpshufd ymm6,ymm6,132
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        vpsrldq ymm6,ymm6,8
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        vpaddd  ymm3,ymm3,ymm6
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        vpshufd ymm7,ymm3,80
        and     esi,r15d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        vpsrld  ymm6,ymm7,10
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        vpsrlq  ymm7,ymm7,17
        add     eax,DWORD[((44+128))+rsp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        vpxor   ymm6,ymm6,ymm7
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        vpsrlq  ymm7,ymm7,2
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        vpxor   ymm6,ymm6,ymm7
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        vpshufd ymm6,ymm6,232
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        vpslldq ymm6,ymm6,8
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        vpaddd  ymm3,ymm3,ymm6
        and     r15d,esi
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        vpaddd  ymm6,ymm3,YMMWORD[96+rbp]
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        vmovdqa YMMWORD[32+rsp],ymm6
        vmovq   r13,xmm15
        vpextrq r15,xmm15,1
        vpand   xmm11,xmm11,xmm14
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r13*1+r15],xmm8
        lea     r13,[16+r13]
        lea     rbp,[128+rbp]
        cmp     BYTE[3+rbp],0
        jne     NEAR $L$avx2_00_47
        vmovdqu xmm9,XMMWORD[r13]
        vpinsrq xmm15,xmm15,r13,0
        add     r11d,DWORD[((0+64))+rsp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        and     esi,r15d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        add     r10d,DWORD[((4+64))+rsp]
        and     r12d,edx
        rorx    r13d,edx,25
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        and     r15d,esi
        vpxor   xmm9,xmm9,xmm8
        xor     r14d,r12d
        xor     r15d,eax
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        add     r9d,DWORD[((8+64))+rsp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        add     r8d,DWORD[((12+64))+rsp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        add     edx,DWORD[((32+64))+rsp]
        and     r12d,eax
        rorx    r13d,eax,25
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        add     ecx,DWORD[((36+64))+rsp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        add     ebx,DWORD[((40+64))+rsp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        add     eax,DWORD[((44+64))+rsp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        add     r11d,DWORD[rsp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        add     r10d,DWORD[4+rsp]
        and     r12d,edx
        rorx    r13d,edx,25
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r14d,r12d
        xor     r15d,eax
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        add     r9d,DWORD[8+rsp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        add     r8d,DWORD[12+rsp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        add     edx,DWORD[32+rsp]
        and     r12d,eax
        rorx    r13d,eax,25
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        and     esi,r15d
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        add     ecx,DWORD[36+rsp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        add     ebx,DWORD[40+rsp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        and     esi,r15d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        add     eax,DWORD[44+rsp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        and     r15d,esi
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        vpextrq r12,xmm15,1
        vmovq   r13,xmm15
        mov     r15,QWORD[552+rsp]
        add     eax,r14d
        lea     rbp,[448+rsp]

        vpand   xmm11,xmm11,xmm14
        vpor    xmm8,xmm8,xmm11
        vmovdqu XMMWORD[r13*1+r12],xmm8
        lea     r13,[16+r13]

        add     eax,DWORD[r15]
        add     ebx,DWORD[4+r15]
        add     ecx,DWORD[8+r15]
        add     edx,DWORD[12+r15]
        add     r8d,DWORD[16+r15]
        add     r9d,DWORD[20+r15]
        add     r10d,DWORD[24+r15]
        add     r11d,DWORD[28+r15]

        mov     DWORD[r15],eax
        mov     DWORD[4+r15],ebx
        mov     DWORD[8+r15],ecx
        mov     DWORD[12+r15],edx
        mov     DWORD[16+r15],r8d
        mov     DWORD[20+r15],r9d
        mov     DWORD[24+r15],r10d
        mov     DWORD[28+r15],r11d

        cmp     r13,QWORD[80+rbp]
        je      NEAR $L$done_avx2

        xor     r14d,r14d
        mov     esi,ebx
        mov     r12d,r9d
        xor     esi,ecx
        jmp     NEAR $L$ower_avx2
ALIGN   16
$L$ower_avx2:
        vmovdqu xmm9,XMMWORD[r13]
        vpinsrq xmm15,xmm15,r13,0
        add     r11d,DWORD[((0+16))+rbp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        and     esi,r15d
        vpxor   xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((16-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        add     r10d,DWORD[((4+16))+rbp]
        and     r12d,edx
        rorx    r13d,edx,25
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        and     r15d,esi
        vpxor   xmm9,xmm9,xmm8
        xor     r14d,r12d
        xor     r15d,eax
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        add     r9d,DWORD[((8+16))+rbp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((32-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        add     r8d,DWORD[((12+16))+rbp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((48-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        add     edx,DWORD[((32+16))+rbp]
        and     r12d,eax
        rorx    r13d,eax,25
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((64-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        add     ecx,DWORD[((36+16))+rbp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((80-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        add     ebx,DWORD[((40+16))+rbp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((96-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        add     eax,DWORD[((44+16))+rbp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((112-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        lea     rbp,[((-64))+rbp]
        add     r11d,DWORD[((0+16))+rbp]
        and     r12d,r8d
        rorx    r13d,r8d,25
        rorx    r15d,r8d,11
        lea     eax,[r14*1+rax]
        lea     r11d,[r12*1+r11]
        andn    r12d,r8d,r10d
        xor     r13d,r15d
        rorx    r14d,r8d,6
        lea     r11d,[r12*1+r11]
        xor     r13d,r14d
        mov     r15d,eax
        rorx    r12d,eax,22
        lea     r11d,[r13*1+r11]
        xor     r15d,ebx
        rorx    r14d,eax,13
        rorx    r13d,eax,2
        lea     edx,[r11*1+rdx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((128-128))+rdi]
        xor     r14d,r12d
        xor     esi,ebx
        xor     r14d,r13d
        lea     r11d,[rsi*1+r11]
        mov     r12d,r8d
        add     r10d,DWORD[((4+16))+rbp]
        and     r12d,edx
        rorx    r13d,edx,25
        rorx    esi,edx,11
        lea     r11d,[r14*1+r11]
        lea     r10d,[r12*1+r10]
        andn    r12d,edx,r9d
        xor     r13d,esi
        rorx    r14d,edx,6
        lea     r10d,[r12*1+r10]
        xor     r13d,r14d
        mov     esi,r11d
        rorx    r12d,r11d,22
        lea     r10d,[r13*1+r10]
        xor     esi,eax
        rorx    r14d,r11d,13
        rorx    r13d,r11d,2
        lea     ecx,[r10*1+rcx]
        and     r15d,esi
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((144-128))+rdi]
        xor     r14d,r12d
        xor     r15d,eax
        xor     r14d,r13d
        lea     r10d,[r15*1+r10]
        mov     r12d,edx
        add     r9d,DWORD[((8+16))+rbp]
        and     r12d,ecx
        rorx    r13d,ecx,25
        rorx    r15d,ecx,11
        lea     r10d,[r14*1+r10]
        lea     r9d,[r12*1+r9]
        andn    r12d,ecx,r8d
        xor     r13d,r15d
        rorx    r14d,ecx,6
        lea     r9d,[r12*1+r9]
        xor     r13d,r14d
        mov     r15d,r10d
        rorx    r12d,r10d,22
        lea     r9d,[r13*1+r9]
        xor     r15d,r11d
        rorx    r14d,r10d,13
        rorx    r13d,r10d,2
        lea     ebx,[r9*1+rbx]
        and     esi,r15d
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((160-128))+rdi]
        xor     r14d,r12d
        xor     esi,r11d
        xor     r14d,r13d
        lea     r9d,[rsi*1+r9]
        mov     r12d,ecx
        add     r8d,DWORD[((12+16))+rbp]
        and     r12d,ebx
        rorx    r13d,ebx,25
        rorx    esi,ebx,11
        lea     r9d,[r14*1+r9]
        lea     r8d,[r12*1+r8]
        andn    r12d,ebx,edx
        xor     r13d,esi
        rorx    r14d,ebx,6
        lea     r8d,[r12*1+r8]
        xor     r13d,r14d
        mov     esi,r9d
        rorx    r12d,r9d,22
        lea     r8d,[r13*1+r8]
        xor     esi,r10d
        rorx    r14d,r9d,13
        rorx    r13d,r9d,2
        lea     eax,[r8*1+rax]
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((176-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r10d
        xor     r14d,r13d
        lea     r8d,[r15*1+r8]
        mov     r12d,ebx
        add     edx,DWORD[((32+16))+rbp]
        and     r12d,eax
        rorx    r13d,eax,25
        rorx    r15d,eax,11
        lea     r8d,[r14*1+r8]
        lea     edx,[r12*1+rdx]
        andn    r12d,eax,ecx
        xor     r13d,r15d
        rorx    r14d,eax,6
        lea     edx,[r12*1+rdx]
        xor     r13d,r14d
        mov     r15d,r8d
        rorx    r12d,r8d,22
        lea     edx,[r13*1+rdx]
        xor     r15d,r9d
        rorx    r14d,r8d,13
        rorx    r13d,r8d,2
        lea     r11d,[rdx*1+r11]
        and     esi,r15d
        vpand   xmm8,xmm11,xmm12
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((192-128))+rdi]
        xor     r14d,r12d
        xor     esi,r9d
        xor     r14d,r13d
        lea     edx,[rsi*1+rdx]
        mov     r12d,eax
        add     ecx,DWORD[((36+16))+rbp]
        and     r12d,r11d
        rorx    r13d,r11d,25
        rorx    esi,r11d,11
        lea     edx,[r14*1+rdx]
        lea     ecx,[r12*1+rcx]
        andn    r12d,r11d,ebx
        xor     r13d,esi
        rorx    r14d,r11d,6
        lea     ecx,[r12*1+rcx]
        xor     r13d,r14d
        mov     esi,edx
        rorx    r12d,edx,22
        lea     ecx,[r13*1+rcx]
        xor     esi,r8d
        rorx    r14d,edx,13
        rorx    r13d,edx,2
        lea     r10d,[rcx*1+r10]
        and     r15d,esi
        vaesenclast     xmm11,xmm9,xmm10
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((208-128))+rdi]
        xor     r14d,r12d
        xor     r15d,r8d
        xor     r14d,r13d
        lea     ecx,[r15*1+rcx]
        mov     r12d,r11d
        add     ebx,DWORD[((40+16))+rbp]
        and     r12d,r10d
        rorx    r13d,r10d,25
        rorx    r15d,r10d,11
        lea     ecx,[r14*1+rcx]
        lea     ebx,[r12*1+rbx]
        andn    r12d,r10d,eax
        xor     r13d,r15d
        rorx    r14d,r10d,6
        lea     ebx,[r12*1+rbx]
        xor     r13d,r14d
        mov     r15d,ecx
        rorx    r12d,ecx,22
        lea     ebx,[r13*1+rbx]
        xor     r15d,edx
        rorx    r14d,ecx,13
        rorx    r13d,ecx,2
        lea     r9d,[rbx*1+r9]
        and     esi,r15d
        vpand   xmm11,xmm11,xmm13
        vaesenc xmm9,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((224-128))+rdi]
        xor     r14d,r12d
        xor     esi,edx
        xor     r14d,r13d
        lea     ebx,[rsi*1+rbx]
        mov     r12d,r10d
        add     eax,DWORD[((44+16))+rbp]
        and     r12d,r9d
        rorx    r13d,r9d,25
        rorx    esi,r9d,11
        lea     ebx,[r14*1+rbx]
        lea     eax,[r12*1+rax]
        andn    r12d,r9d,r11d
        xor     r13d,esi
        rorx    r14d,r9d,6
        lea     eax,[r12*1+rax]
        xor     r13d,r14d
        mov     esi,ebx
        rorx    r12d,ebx,22
        lea     eax,[r13*1+rax]
        xor     esi,ecx
        rorx    r14d,ebx,13
        rorx    r13d,ebx,2
        lea     r8d,[rax*1+r8]
        and     r15d,esi
        vpor    xmm8,xmm8,xmm11
        vaesenclast     xmm11,xmm9,xmm10
        vmovdqu xmm10,XMMWORD[((0-128))+rdi]
        xor     r14d,r12d
        xor     r15d,ecx
        xor     r14d,r13d
        lea     eax,[r15*1+rax]
        mov     r12d,r9d
        vmovq   r13,xmm15
        vpextrq r15,xmm15,1
        vpand   xmm11,xmm11,xmm14
        vpor    xmm8,xmm8,xmm11
        lea     rbp,[((-64))+rbp]
        vmovdqu XMMWORD[r13*1+r15],xmm8
        lea     r13,[16+r13]
        cmp     rbp,rsp
        jae     NEAR $L$ower_avx2

        mov     r15,QWORD[552+rsp]
        lea     r13,[64+r13]
        mov     rsi,QWORD[560+rsp]
        add     eax,r14d
        lea     rsp,[448+rsp]

        add     eax,DWORD[r15]
        add     ebx,DWORD[4+r15]
        add     ecx,DWORD[8+r15]
        add     edx,DWORD[12+r15]
        add     r8d,DWORD[16+r15]
        add     r9d,DWORD[20+r15]
        add     r10d,DWORD[24+r15]
        lea     r12,[r13*1+rsi]
        add     r11d,DWORD[28+r15]

        cmp     r13,QWORD[((64+16))+rsp]

        mov     DWORD[r15],eax
        cmove   r12,rsp
        mov     DWORD[4+r15],ebx
        mov     DWORD[8+r15],ecx
        mov     DWORD[12+r15],edx
        mov     DWORD[16+r15],r8d
        mov     DWORD[20+r15],r9d
        mov     DWORD[24+r15],r10d
        mov     DWORD[28+r15],r11d

        jbe     NEAR $L$oop_avx2
        lea     rbp,[rsp]




$L$done_avx2:
        mov     r8,QWORD[((64+32))+rbp]
        mov     rsi,QWORD[((64+56))+rbp]

        vmovdqu XMMWORD[r8],xmm8
        vzeroall
        movaps  xmm6,XMMWORD[128+rbp]
        movaps  xmm7,XMMWORD[144+rbp]
        movaps  xmm8,XMMWORD[160+rbp]
        movaps  xmm9,XMMWORD[176+rbp]
        movaps  xmm10,XMMWORD[192+rbp]
        movaps  xmm11,XMMWORD[208+rbp]
        movaps  xmm12,XMMWORD[224+rbp]
        movaps  xmm13,XMMWORD[240+rbp]
        movaps  xmm14,XMMWORD[256+rbp]
        movaps  xmm15,XMMWORD[272+rbp]
        mov     r15,QWORD[((-48))+rsi]

        mov     r14,QWORD[((-40))+rsi]

        mov     r13,QWORD[((-32))+rsi]

        mov     r12,QWORD[((-24))+rsi]

        mov     rbp,QWORD[((-16))+rsi]

        mov     rbx,QWORD[((-8))+rsi]

        lea     rsp,[rsi]

$L$epilogue_avx2:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_aesni_cbc_sha256_enc_avx2:

ALIGN   32
aesni_cbc_sha256_enc_shaext:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_shaext:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8
        mov     rcx,r9
        mov     r8,QWORD[40+rsp]
        mov     r9,QWORD[48+rsp]



        mov     r10,QWORD[56+rsp]
        lea     rsp,[((-168))+rsp]
        movaps  XMMWORD[(-8-160)+rax],xmm6
        movaps  XMMWORD[(-8-144)+rax],xmm7
        movaps  XMMWORD[(-8-128)+rax],xmm8
        movaps  XMMWORD[(-8-112)+rax],xmm9
        movaps  XMMWORD[(-8-96)+rax],xmm10
        movaps  XMMWORD[(-8-80)+rax],xmm11
        movaps  XMMWORD[(-8-64)+rax],xmm12
        movaps  XMMWORD[(-8-48)+rax],xmm13
        movaps  XMMWORD[(-8-32)+rax],xmm14
        movaps  XMMWORD[(-8-16)+rax],xmm15
$L$prologue_shaext:
        lea     rax,[((K256+128))]
        movdqu  xmm1,XMMWORD[r9]
        movdqu  xmm2,XMMWORD[16+r9]
        movdqa  xmm3,XMMWORD[((512-128))+rax]

        mov     r11d,DWORD[240+rcx]
        sub     rsi,rdi
        movups  xmm15,XMMWORD[rcx]
        movups  xmm6,XMMWORD[r8]
        movups  xmm4,XMMWORD[16+rcx]
        lea     rcx,[112+rcx]

        pshufd  xmm0,xmm1,0x1b
        pshufd  xmm1,xmm1,0xb1
        pshufd  xmm2,xmm2,0x1b
        movdqa  xmm7,xmm3
DB      102,15,58,15,202,8
        punpcklqdq      xmm2,xmm0

        jmp     NEAR $L$oop_shaext

ALIGN   16
$L$oop_shaext:
        movdqu  xmm10,XMMWORD[r10]
        movdqu  xmm11,XMMWORD[16+r10]
        movdqu  xmm12,XMMWORD[32+r10]
DB      102,68,15,56,0,211
        movdqu  xmm13,XMMWORD[48+r10]

        movdqa  xmm0,XMMWORD[((0-128))+rax]
        paddd   xmm0,xmm10
DB      102,68,15,56,0,219
        movdqa  xmm9,xmm2
        movdqa  xmm8,xmm1
        movups  xmm14,XMMWORD[rdi]
        xorps   xmm14,xmm15
        xorps   xmm6,xmm14
        movups  xmm5,XMMWORD[((-80))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movups  xmm4,XMMWORD[((-64))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,202

        movdqa  xmm0,XMMWORD[((32-128))+rax]
        paddd   xmm0,xmm11
DB      102,68,15,56,0,227
        lea     r10,[64+r10]
        movups  xmm5,XMMWORD[((-48))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movups  xmm4,XMMWORD[((-32))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,202

        movdqa  xmm0,XMMWORD[((64-128))+rax]
        paddd   xmm0,xmm12
DB      102,68,15,56,0,235
DB      69,15,56,204,211
        movups  xmm5,XMMWORD[((-16))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm13
DB      102,65,15,58,15,220,4
        paddd   xmm10,xmm3
        movups  xmm4,XMMWORD[rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,202

        movdqa  xmm0,XMMWORD[((96-128))+rax]
        paddd   xmm0,xmm13
DB      69,15,56,205,213
DB      69,15,56,204,220
        movups  xmm5,XMMWORD[16+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movups  xmm4,XMMWORD[32+rcx]
        aesenc  xmm6,xmm5
        movdqa  xmm3,xmm10
DB      102,65,15,58,15,221,4
        paddd   xmm11,xmm3
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((128-128))+rax]
        paddd   xmm0,xmm10
DB      69,15,56,205,218
DB      69,15,56,204,229
        movups  xmm5,XMMWORD[48+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm11
DB      102,65,15,58,15,218,4
        paddd   xmm12,xmm3
        cmp     r11d,11
        jb      NEAR $L$aesenclast1
        movups  xmm4,XMMWORD[64+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[80+rcx]
        aesenc  xmm6,xmm4
        je      NEAR $L$aesenclast1
        movups  xmm4,XMMWORD[96+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[112+rcx]
        aesenc  xmm6,xmm4
$L$aesenclast1:
        aesenclast      xmm6,xmm5
        movups  xmm4,XMMWORD[((16-112))+rcx]
        nop
DB      15,56,203,202
        movups  xmm14,XMMWORD[16+rdi]
        xorps   xmm14,xmm15
        movups  XMMWORD[rdi*1+rsi],xmm6
        xorps   xmm6,xmm14
        movups  xmm5,XMMWORD[((-80))+rcx]
        aesenc  xmm6,xmm4
        movdqa  xmm0,XMMWORD[((160-128))+rax]
        paddd   xmm0,xmm11
DB      69,15,56,205,227
DB      69,15,56,204,234
        movups  xmm4,XMMWORD[((-64))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm12
DB      102,65,15,58,15,219,4
        paddd   xmm13,xmm3
        movups  xmm5,XMMWORD[((-48))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((192-128))+rax]
        paddd   xmm0,xmm12
DB      69,15,56,205,236
DB      69,15,56,204,211
        movups  xmm4,XMMWORD[((-32))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm13
DB      102,65,15,58,15,220,4
        paddd   xmm10,xmm3
        movups  xmm5,XMMWORD[((-16))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((224-128))+rax]
        paddd   xmm0,xmm13
DB      69,15,56,205,213
DB      69,15,56,204,220
        movups  xmm4,XMMWORD[rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm10
DB      102,65,15,58,15,221,4
        paddd   xmm11,xmm3
        movups  xmm5,XMMWORD[16+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((256-128))+rax]
        paddd   xmm0,xmm10
DB      69,15,56,205,218
DB      69,15,56,204,229
        movups  xmm4,XMMWORD[32+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm11
DB      102,65,15,58,15,218,4
        paddd   xmm12,xmm3
        movups  xmm5,XMMWORD[48+rcx]
        aesenc  xmm6,xmm4
        cmp     r11d,11
        jb      NEAR $L$aesenclast2
        movups  xmm4,XMMWORD[64+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[80+rcx]
        aesenc  xmm6,xmm4
        je      NEAR $L$aesenclast2
        movups  xmm4,XMMWORD[96+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[112+rcx]
        aesenc  xmm6,xmm4
$L$aesenclast2:
        aesenclast      xmm6,xmm5
        movups  xmm4,XMMWORD[((16-112))+rcx]
        nop
DB      15,56,203,202
        movups  xmm14,XMMWORD[32+rdi]
        xorps   xmm14,xmm15
        movups  XMMWORD[16+rdi*1+rsi],xmm6
        xorps   xmm6,xmm14
        movups  xmm5,XMMWORD[((-80))+rcx]
        aesenc  xmm6,xmm4
        movdqa  xmm0,XMMWORD[((288-128))+rax]
        paddd   xmm0,xmm11
DB      69,15,56,205,227
DB      69,15,56,204,234
        movups  xmm4,XMMWORD[((-64))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm12
DB      102,65,15,58,15,219,4
        paddd   xmm13,xmm3
        movups  xmm5,XMMWORD[((-48))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((320-128))+rax]
        paddd   xmm0,xmm12
DB      69,15,56,205,236
DB      69,15,56,204,211
        movups  xmm4,XMMWORD[((-32))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm13
DB      102,65,15,58,15,220,4
        paddd   xmm10,xmm3
        movups  xmm5,XMMWORD[((-16))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((352-128))+rax]
        paddd   xmm0,xmm13
DB      69,15,56,205,213
DB      69,15,56,204,220
        movups  xmm4,XMMWORD[rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm10
DB      102,65,15,58,15,221,4
        paddd   xmm11,xmm3
        movups  xmm5,XMMWORD[16+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((384-128))+rax]
        paddd   xmm0,xmm10
DB      69,15,56,205,218
DB      69,15,56,204,229
        movups  xmm4,XMMWORD[32+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm11
DB      102,65,15,58,15,218,4
        paddd   xmm12,xmm3
        movups  xmm5,XMMWORD[48+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202
        movdqa  xmm0,XMMWORD[((416-128))+rax]
        paddd   xmm0,xmm11
DB      69,15,56,205,227
DB      69,15,56,204,234
        cmp     r11d,11
        jb      NEAR $L$aesenclast3
        movups  xmm4,XMMWORD[64+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[80+rcx]
        aesenc  xmm6,xmm4
        je      NEAR $L$aesenclast3
        movups  xmm4,XMMWORD[96+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[112+rcx]
        aesenc  xmm6,xmm4
$L$aesenclast3:
        aesenclast      xmm6,xmm5
        movups  xmm4,XMMWORD[((16-112))+rcx]
        nop
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movdqa  xmm3,xmm12
DB      102,65,15,58,15,219,4
        paddd   xmm13,xmm3
        movups  xmm14,XMMWORD[48+rdi]
        xorps   xmm14,xmm15
        movups  XMMWORD[32+rdi*1+rsi],xmm6
        xorps   xmm6,xmm14
        movups  xmm5,XMMWORD[((-80))+rcx]
        aesenc  xmm6,xmm4
        movups  xmm4,XMMWORD[((-64))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,202

        movdqa  xmm0,XMMWORD[((448-128))+rax]
        paddd   xmm0,xmm12
DB      69,15,56,205,236
        movdqa  xmm3,xmm7
        movups  xmm5,XMMWORD[((-48))+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movups  xmm4,XMMWORD[((-32))+rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,202

        movdqa  xmm0,XMMWORD[((480-128))+rax]
        paddd   xmm0,xmm13
        movups  xmm5,XMMWORD[((-16))+rcx]
        aesenc  xmm6,xmm4
        movups  xmm4,XMMWORD[rcx]
        aesenc  xmm6,xmm5
DB      15,56,203,209
        pshufd  xmm0,xmm0,0x0e
        movups  xmm5,XMMWORD[16+rcx]
        aesenc  xmm6,xmm4
DB      15,56,203,202

        movups  xmm4,XMMWORD[32+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[48+rcx]
        aesenc  xmm6,xmm4
        cmp     r11d,11
        jb      NEAR $L$aesenclast4
        movups  xmm4,XMMWORD[64+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[80+rcx]
        aesenc  xmm6,xmm4
        je      NEAR $L$aesenclast4
        movups  xmm4,XMMWORD[96+rcx]
        aesenc  xmm6,xmm5
        movups  xmm5,XMMWORD[112+rcx]
        aesenc  xmm6,xmm4
$L$aesenclast4:
        aesenclast      xmm6,xmm5
        movups  xmm4,XMMWORD[((16-112))+rcx]
        nop

        paddd   xmm2,xmm9
        paddd   xmm1,xmm8

        dec     rdx
        movups  XMMWORD[48+rdi*1+rsi],xmm6
        lea     rdi,[64+rdi]
        jnz     NEAR $L$oop_shaext

        pshufd  xmm2,xmm2,0xb1
        pshufd  xmm3,xmm1,0x1b
        pshufd  xmm1,xmm1,0xb1
        punpckhqdq      xmm1,xmm2
DB      102,15,58,15,211,8

        movups  XMMWORD[r8],xmm6
        movdqu  XMMWORD[r9],xmm1
        movdqu  XMMWORD[16+r9],xmm2
        movaps  xmm6,XMMWORD[rsp]
        movaps  xmm7,XMMWORD[16+rsp]
        movaps  xmm8,XMMWORD[32+rsp]
        movaps  xmm9,XMMWORD[48+rsp]
        movaps  xmm10,XMMWORD[64+rsp]
        movaps  xmm11,XMMWORD[80+rsp]
        movaps  xmm12,XMMWORD[96+rsp]
        movaps  xmm13,XMMWORD[112+rsp]
        movaps  xmm14,XMMWORD[128+rsp]
        movaps  xmm15,XMMWORD[144+rsp]
        lea     rsp,[((8+160))+rsp]
$L$epilogue_shaext:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_aesni_cbc_sha256_enc_shaext:
EXTERN  __imp_RtlVirtualUnwind

ALIGN   16
se_handler:
        push    rsi
        push    rdi
        push    rbx
        push    rbp
        push    r12
        push    r13
        push    r14
        push    r15
        pushfq
        sub     rsp,64

        mov     rax,QWORD[120+r8]
        mov     rbx,QWORD[248+r8]

        mov     rsi,QWORD[8+r9]
        mov     r11,QWORD[56+r9]

        mov     r10d,DWORD[r11]
        lea     r10,[r10*1+rsi]
        cmp     rbx,r10
        jb      NEAR $L$in_prologue

        mov     rax,QWORD[152+r8]

        mov     r10d,DWORD[4+r11]
        lea     r10,[r10*1+rsi]
        cmp     rbx,r10
        jae     NEAR $L$in_prologue
        lea     r10,[aesni_cbc_sha256_enc_shaext]
        cmp     rbx,r10
        jb      NEAR $L$not_in_shaext

        lea     rsi,[rax]
        lea     rdi,[512+r8]
        mov     ecx,20
        DD      0xa548f3fc
        lea     rax,[168+rax]
        jmp     NEAR $L$in_prologue
$L$not_in_shaext:
        lea     r10,[$L$avx2_shortcut]
        cmp     rbx,r10
        jb      NEAR $L$not_in_avx2

        and     rax,-256*4
        add     rax,448
$L$not_in_avx2:
        mov     rsi,rax
        mov     rax,QWORD[((64+56))+rax]

        mov     rbx,QWORD[((-8))+rax]
        mov     rbp,QWORD[((-16))+rax]
        mov     r12,QWORD[((-24))+rax]
        mov     r13,QWORD[((-32))+rax]
        mov     r14,QWORD[((-40))+rax]
        mov     r15,QWORD[((-48))+rax]
        mov     QWORD[144+r8],rbx
        mov     QWORD[160+r8],rbp
        mov     QWORD[216+r8],r12
        mov     QWORD[224+r8],r13
        mov     QWORD[232+r8],r14
        mov     QWORD[240+r8],r15

        lea     rsi,[((64+64))+rsi]
        lea     rdi,[512+r8]
        mov     ecx,20
        DD      0xa548f3fc

$L$in_prologue:
        mov     rdi,QWORD[8+rax]
        mov     rsi,QWORD[16+rax]
        mov     QWORD[152+r8],rax
        mov     QWORD[168+r8],rsi
        mov     QWORD[176+r8],rdi

        mov     rdi,QWORD[40+r9]
        mov     rsi,r8
        mov     ecx,154
        DD      0xa548f3fc

        mov     rsi,r9
        xor     rcx,rcx
        mov     rdx,QWORD[8+rsi]
        mov     r8,QWORD[rsi]
        mov     r9,QWORD[16+rsi]
        mov     r10,QWORD[40+rsi]
        lea     r11,[56+rsi]
        lea     r12,[24+rsi]
        mov     QWORD[32+rsp],r10
        mov     QWORD[40+rsp],r11
        mov     QWORD[48+rsp],r12
        mov     QWORD[56+rsp],rcx
        call    QWORD[__imp_RtlVirtualUnwind]

        mov     eax,1
        add     rsp,64
        popfq
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        pop     rbp
        pop     rbx
        pop     rdi
        pop     rsi
        DB      0F3h,0C3h               ;repret


section .pdata rdata align=4
        DD      $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase
        DD      $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase
        DD      $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase

        DD      $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase
        DD      $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase
        DD      $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase
        DD      $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
        DD      $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
        DD      $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
        DD      $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase
        DD      $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase
        DD      $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase
section .xdata rdata align=8
ALIGN   8
$L$SEH_info_aesni_cbc_sha256_enc_xop:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase
        DD      $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase

$L$SEH_info_aesni_cbc_sha256_enc_avx:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase
        DD      $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
$L$SEH_info_aesni_cbc_sha256_enc_avx2:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase
        DD      $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
$L$SEH_info_aesni_cbc_sha256_enc_shaext:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase
        DD      $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
